import requests
from lxml import etree

class DataPackage():
    def dataList(self,headers,url):
        request = requests.get(url, headers=headers)
        infoHTML = etree.HTML(request.text)

        infoResult = infoHTML.xpath('//section[@id="caseList"]/div/div[2]/div[1]/div')
        data_excel = {"行业": [], "标题": [], "内容": [], "详情链接": [],"详情内容":[]}
        for info in infoResult:
            # 行业
            trade = info.xpath('./a/div[2]/div[2]/span/text()')[0]
            data_excel["行业"].append(trade)
            # 标题
            title = info.xpath('./a/div[2]/div[3]/h3/text()')[0]
            data_excel["标题"].append(title)
            # 内容
            content = info.xpath('./a/div[2]/div[3]/p/text()')[0]
            data_excel["内容"].append(content)
            # 图片链接
            # pic = info.xpath('./a/div[1]/span/img/@src')
            http = url = request.url.split('/')[0] + "//" + request.url.split('/')[2]
            url = f'{http + info.xpath('./a/@href')[0]}'
            data_excel["详情链接"].append(url)
            
            # 详情内容
            details = requests.get(url, headers=headers)
            detailsHtml = etree.HTML(details.text)
            detailsContent = detailsHtml.xpath('string(//*[@id="caseDetailContent"]/div/div[1]/div)')
            data_excel["详情内容"].append(detailsContent)
        return data_excel
